section .bss
    trailmap resb 256*128

    ; every particle is 4 bytes in RAM: posx, posy, velx, vely
    particles resb 256*4


section .text
    org 0x100             ; Origin for .COM files

;;; clear_trailmap

mov di, trailmap
mov cx, 256*128

L_clear_fill:
    stosb
    loop L_clear_fill

;;; end clear_trailmap

; Set video mode to 13h (320x200)
mov al, 0x13
int 0x10

; Set our custom ISR for int 0x1c
mov ax, 0x2500 + 0x1c
lea dx, I_main
int 0x21

; mov di, particles
; di is already after trailmap, which is where particles is
mov cx, 4*256
L_test_fill:
    imul ax, 1337
    add ax, 7331
    stosb
    loop L_test_fill


halt:
    hlt
    jmp halt



I_main: ; called at 18.2 Hz by timer interrupt (also executed initially because who cares)

    ;;;; horizontal blur

        ; DS:SI will point to trailmap
        ; ES:DI will point to rowbuf


        ; Set ES to DS for the entire operation since both trailmap and rowbuf reside in the same segment
        mov ax, ds
        mov es, ax


        ; Copy from trailmap to rowbuf
        lea si, [trailmap]
        lea di, [trailmap]

        mov cx, 128 ; 128 rows
        L_horiz_blur_row:
            push cx

            mov dl, [si] ; dl is blur_first here
            ; storing for the last pixel

            mov dh, [si+255] ; dh is blur_prev here
            ; wrapped around to the last to be used by the first

            mov cx, 256
            L_horiz_blur_pixel:

                xor ah, ah
                xor bh, bh

                mov al, dh

                mov bl, [si]
                add ax, bx
                add ax, bx

                mov dh, bl


                mov bl, dl ; overwritten everywhere except in the last col
                inc si

                cmp cx, 1 ; before the loop instruction, counter is 1 for the last column
                je skipreadnext
                mov bl, [si] ; don't read if cx is 0! will overflow in the last row (who cares but eh)
                skipreadnext:

                add ax, bx

                shr ax, 2
                stosb
                loop L_horiz_blur_pixel


            pop cx
            loop L_horiz_blur_row

    ;;;; end horizontal blur

        mov cx, 256
        mov si, particles
        L_foreach_particle:
            push cx

            ;;;; sense

            mov ax, [si+2] ; loads velx into al and vely into ah

            sar al, 5 ; velocity is a 2.6 fixed-point number, we need a multiple of it here
            sar ah, 5

            mov bh, al
            mov bl, 0
            sub bl, ah

            ;sar bl, 1 ; sideways half-length ax
            ;sar bh, 1

            mov cx, [si] ; sensor position, starting with particle pos
            add cl, al
            add ch, ah

            add cl, bl
            add ch, bh

            push si

            mov si, cx
            mov dl, [si]


            ; look the other way
            sub cl, bl
            sub ch, bh
            sub cl, bl
            sub ch, bh


            mov si, cx
            mov dh, [si]

            pop si


            ; dl is the trail sample left, dh is the trail sample right


            mov cx, [si+2] ; loads velx into cl vely into ch

            cmp dl, dh
            jg B_turn_left
            jl B_turn_right
            jmp B_turn_end



            B_turn_left:
                add cl, bl
                add ch, bh
                jmp B_turn_end

            B_turn_right:
                sub cl, bl
                sub ch, bh

            B_turn_end:


            mov [si+2], cx ; stores cl into velx and ch into vely


            ;;; end sense

            ;;; move

            mov ax, [si+2] ; loads velx into al and vely into ah

            sar al, 6 ; velocity is a 2.6 fixed-point number
            sar ah, 6

            add al, [si]
            add ah, [si+1]

            mov [si], ax ; stores al into posx and ah into posy

            ;;; end move


            ;;; deposit

            ; mov ax, [si] ; loads posx into al and posy into ah - ; ax is already the position
            and ah, 0x7F ; there are only 128 rows
            mov di, trailmap
            add di, ax ; the 256 columns make indexing neat
            mov byte [ds:di], 0xFF-8 ; just set it to high, no overflows (the -8 is because of the rounding while blitting)

            ;;; end deposit

        pop cx
        add si, 4
        loop L_foreach_particle





;;;;;;;; copy trail map to screen ;;;;;;;;

    ; Set ES to video memory segment
    mov ax, 0xA000
    mov es, ax

    ; Calculate offset in video memory to center the bitmap
    ; (320-256)/2 = 32 pixels horizontally
    ; (200-128)/2 = 36 pixels vertically
    mov di, (36*320)+32   ; Destination offset in video memory

    ; Set DS:SI to the source bitmap data
    lea si, trailmap

    ; Copy loop
    mov cx, 128           ; Number of rows
    L_copy_row:
        push cx

        mov cx, 256           ; Number of bytes per row to copy
        L_copy_pixel:
            ; Copy byte from DS:SI to ES:DI
            lodsb                  ; Load byte from DS:SI into AL, increment SI
            xor ah, ah
            add ax, 8 ; rounding
            shr al, 4
            add al, 16 ; 32 for rainbow

            stosb                  ; Store byte from AL to ES:DI, increment DI
            loop L_copy_pixel

        ; Add offset to DI to jump to the next line
        add di, 320-256        ; 320 (screen width) - 256 (bitmap width)

        pop cx
        loop L_copy_row

iret ; I_main is an interrupt handler, so we must return with iret

